# Import useful modules
import numpy as np
import pandas as pd
import scanpy as sc
import os
#import igraph
import matplotlib.pyplot as plt
import seaborn
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=90)
# Load the integrated and annotated dataset
adata_ann = sc.read_h5ad('/Data/Annotated_dataset_v1.h5ad')
# Load non-integrated & pre-processed dataset
adata_raw = sc.read_h5ad('/Data/PreProcessed_preliminary_dataset.h5ad')
# Transfer annotation onto a non-data integrated anndata object.
adata = adata_raw[adata_ann.obs.index.tolist(), ]
adata.obs['cell_type'] = adata_ann.obs['cell_type']
sup_cell_bool = []
for x in adata.obs['cell_type_small']:
sup_cell_bool = sup_cell_bool + [x in ['Suprabasal', 'Suprabasal N']]
list_of_cell_names = adata.obs.loc[sup_cell_bool, :].index.tolist()
adata = adata[list_of_cell_names, ]
adata.shape
adata = sc.AnnData(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, log = False)
sc.pl.pca_loadings(adata, components=list(range(0,16)))
sc.pp.neighbors(adata, n_neighbors=100, n_pcs=5)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['manip'], edges = False)
sc.pl.umap(adata, color=['cell_type_small', 'position'], edges = False)
communities, graph, Q = sc.external.tl.phenograph(adata.obsm['X_pca'][:,0:5], k=100)
#adata.obs['phenograph_3'] = pd.Categorical(communities)
adata.obs.insert(5, 'phenograph_3', pd.Categorical(communities))
sc.pl.umap(adata, color=['phenograph_3', 'manip'])
new_cluster_names = {
'0':'Suprabasal', '1':'Suprabasal', '2':'Suprabasal N', '3':'Suprabasal', '4':'Suprabasal', # 0-4
'5':'Suprabasal','6':'Suprabasal', '7':'Suprabasal N', '8':'Suprabasal N', '9':'Suprabasal mix',
'10':'Suprabasal mix', '11':'Suprabasal mix', '12':'Suprabasal mix', '13':'Suprabasal'
}
sc.pl.umap(adata, color=['cell_type_suprabasal_3'])
Suprabasal mix cells corresponding to KRT13+ cells
sc.tl.rank_genes_groups(adata, groupby = 'cell_type_suprabasal_3', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
sc.pl.umap(adata, color=['KRT13', 'HSPB1', 'S100A11', 'CSTA'])
sc.pl.umap(adata, color=['SERPINF1', 'HMGN3', 'GLUL', 'LDHB'])
sc.tl.rank_genes_groups(adata, groupby = 'cell_type_suprabasal_3', method='wilcoxon',
groups= ['Suprabasal'], reference = 'Suprabasal N')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
sc.pl.umap(adata, color=['CHST9', 'NBEAL1', 'MGST1', 'CYB5A'])
sc.tl.rank_genes_groups(adata, groupby = 'cell_type_suprabasal_3', method='wilcoxon',
groups= ['Suprabasal N'], reference = 'Suprabasal')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
sc.pl.umap(adata, color=['S100A4', 'IFI27', 'ALDH3A1', 'AKR1C2'])
adata.obs.to_csv(path_or_buf = '/data/deprez_data/HCA/Analysis/FullDataset_v6/Suprabasal_metadata.tsv',
sep = '\t', index = False)
adata.write('/data/deprez_data/HCA/Analysis/FullDataset_v6/anndata_V6_Suprabasal.h5ad')
cell_bool = []
for x in adata.obs['cell_type']:
cell_bool = cell_bool + [x in ['Suprabasal', 'Suprabasal N', 'Secretory', 'Secretory N']]
list_of_cell_names = adata.obs.loc[cell_bool, :].index.tolist()
adata = adata[list_of_cell_names, ]
adata.shape
adata = sc.AnnData(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, log = False)
sc.pl.pca_loadings(adata, components=list(range(0,16)))
sc.pp.neighbors(adata, n_neighbors=100, n_pcs=5)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['manip'], edges = False)
sc.pl.umap(adata, color=['cell_type', 'cell_type_small'], edges = False)
sc.pl.umap(adata, color=['position', 'donor'], edges = False)
communities, graph, Q = sc.external.tl.phenograph(adata.obsm['X_pca'][:,0:5], k=100)
#adata.obs['phenograph_SCs'] = pd.Categorical(communities)
adata.obs.insert(18, 'phenograph_SCs', pd.Categorical(communities))
sc.pl.umap(adata, color=['phenograph_SCs', 'position'])
cell_bool = []
for x in adata.obs['cell_type_small']:
cell_bool = cell_bool + [x in ['Suprabasal', 'Suprabasal N', 'Basal']]
list_of_cell_names = adata.obs.loc[cell_bool, :].index.tolist()
adata = adata[list_of_cell_names, ]
adata.shape
adata = sc.AnnData(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, log = False)
sc.pl.pca_loadings(adata, components=list(range(0,16)))
sc.pp.neighbors(adata, n_neighbors=100, n_pcs=5)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['manip'], edges = False)
sc.pl.umap(adata, color=['cell_type', 'cell_type_small'], edges = False)
sc.pl.umap(adata, color=['position', 'donor'], edges = False)